{
 "nbformat": 4,
 "nbformat_minor": 2,
 "metadata": {
  "language_info": {
   "name": "python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "file_extension": ".py",
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Report Date Adjustment\n",
    "This notebook explores how to adjust the date of reports from the ECDC, which are published at 10 AM CET. All datapoints in the report use the reported date, rather than the local date. This leads to most countries having a one-day difference between the date reported by the ECDC and the reports from local authorities."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  CountryCode           CountryName      TimeZoneName TimeZoneOffset\n0          AD               Andorra    Europe/Andorra     UTC +02:00\n1          AE  United Arab Emirates        Asia/Dubai     UTC +04:00\n2          AF           Afghanistan        Asia/Kabul     UTC +04:30\n3          AG   Antigua and Barbuda   America/Antigua     UTC -04:00\n4          AI              Anguilla  America/Anguilla     UTC -04:00",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CountryCode</th>\n      <th>CountryName</th>\n      <th>TimeZoneName</th>\n      <th>TimeZoneOffset</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>AD</td>\n      <td>Andorra</td>\n      <td>Europe/Andorra</td>\n      <td>UTC +02:00</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>AE</td>\n      <td>United Arab Emirates</td>\n      <td>Asia/Dubai</td>\n      <td>UTC +04:00</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>AF</td>\n      <td>Afghanistan</td>\n      <td>Asia/Kabul</td>\n      <td>UTC +04:30</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>AG</td>\n      <td>Antigua and Barbuda</td>\n      <td>America/Antigua</td>\n      <td>UTC -04:00</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>AI</td>\n      <td>Anguilla</td>\n      <td>America/Anguilla</td>\n      <td>UTC -04:00</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 1
    }
   ],
   "source": [
    "from pandas import read_csv\n",
    "tz = read_csv('../input/timezones.csv')\n",
    "tz.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Compute the UTC offset as a number to be able to perform math with it, e.g. UTC +02:00 -> 2.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  CountryCode           CountryName      TimeZoneName TimeZoneOffset  \\\n0          AD               Andorra    Europe/Andorra     UTC +02:00   \n1          AE  United Arab Emirates        Asia/Dubai     UTC +04:00   \n2          AF           Afghanistan        Asia/Kabul     UTC +04:30   \n3          AG   Antigua and Barbuda   America/Antigua     UTC -04:00   \n4          AI              Anguilla  America/Anguilla     UTC -04:00   \n\n   UTCOffset  \n0        2.0  \n1        4.0  \n2        4.5  \n3       -4.0  \n4       -4.0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CountryCode</th>\n      <th>CountryName</th>\n      <th>TimeZoneName</th>\n      <th>TimeZoneOffset</th>\n      <th>UTCOffset</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>AD</td>\n      <td>Andorra</td>\n      <td>Europe/Andorra</td>\n      <td>UTC +02:00</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>AE</td>\n      <td>United Arab Emirates</td>\n      <td>Asia/Dubai</td>\n      <td>UTC +04:00</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>AF</td>\n      <td>Afghanistan</td>\n      <td>Asia/Kabul</td>\n      <td>UTC +04:30</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>AG</td>\n      <td>Antigua and Barbuda</td>\n      <td>America/Antigua</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>AI</td>\n      <td>Anguilla</td>\n      <td>America/Anguilla</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 2
    }
   ],
   "source": [
    "import re\n",
    "tz.TimeZoneOffset = tz.TimeZoneOffset.apply(lambda x: 'UTC +00:00' if x == 'UTC' else x)\n",
    "tz['UTCOffset'] = tz.TimeZoneOffset.apply(\n",
    "    lambda x: re.sub(r'(\\d\\d):(\\d\\d)', lambda g: str(int(g.group(1)) + int(g.group(2)) / 60), x[4:]))\n",
    "tz['UTCOffset'] = tz.UTCOffset.apply(lambda x: float(x))\n",
    "tz.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "CET is the timezone of the ECDC reports, to go from GMT (UTC) to CET just substract 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  CountryCode           CountryName      TimeZoneName TimeZoneOffset  \\\n0          AD               Andorra    Europe/Andorra     UTC +02:00   \n1          AE  United Arab Emirates        Asia/Dubai     UTC +04:00   \n2          AF           Afghanistan        Asia/Kabul     UTC +04:30   \n3          AG   Antigua and Barbuda   America/Antigua     UTC -04:00   \n4          AI              Anguilla  America/Anguilla     UTC -04:00   \n\n   UTCOffset  CETOffset  \n0        2.0        0.0  \n1        4.0        2.0  \n2        4.5        2.5  \n3       -4.0       -6.0  \n4       -4.0       -6.0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CountryCode</th>\n      <th>CountryName</th>\n      <th>TimeZoneName</th>\n      <th>TimeZoneOffset</th>\n      <th>UTCOffset</th>\n      <th>CETOffset</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>AD</td>\n      <td>Andorra</td>\n      <td>Europe/Andorra</td>\n      <td>UTC +02:00</td>\n      <td>2.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>AE</td>\n      <td>United Arab Emirates</td>\n      <td>Asia/Dubai</td>\n      <td>UTC +04:00</td>\n      <td>4.0</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>AF</td>\n      <td>Afghanistan</td>\n      <td>Asia/Kabul</td>\n      <td>UTC +04:30</td>\n      <td>4.5</td>\n      <td>2.5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>AG</td>\n      <td>Antigua and Barbuda</td>\n      <td>America/Antigua</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n      <td>-6.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>AI</td>\n      <td>Anguilla</td>\n      <td>America/Anguilla</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n      <td>-6.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 3
    }
   ],
   "source": [
    "tz['CETOffset'] = tz.UTCOffset - 2\n",
    "tz.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "ECDC reports come out at 10 AM CET, assuming that the data will be coming from a local authority reported by 8 PM local time we can compute the cutoff time for the data to be included in the ECDC report and derive a date offset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  CountryCode           CountryName      TimeZoneName TimeZoneOffset  \\\n0          AD               Andorra    Europe/Andorra     UTC +02:00   \n1          AE  United Arab Emirates        Asia/Dubai     UTC +04:00   \n2          AF           Afghanistan        Asia/Kabul     UTC +04:30   \n3          AG   Antigua and Barbuda   America/Antigua     UTC -04:00   \n4          AI              Anguilla  America/Anguilla     UTC -04:00   \n\n   UTCOffset  CETOffset  ReportOffset  ReportOffsetDays  \n0        2.0        0.0          10.0                -1  \n1        4.0        2.0          12.0                -1  \n2        4.5        2.5          12.5                -1  \n3       -4.0       -6.0           4.0                -1  \n4       -4.0       -6.0           4.0                -1  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CountryCode</th>\n      <th>CountryName</th>\n      <th>TimeZoneName</th>\n      <th>TimeZoneOffset</th>\n      <th>UTCOffset</th>\n      <th>CETOffset</th>\n      <th>ReportOffset</th>\n      <th>ReportOffsetDays</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>AD</td>\n      <td>Andorra</td>\n      <td>Europe/Andorra</td>\n      <td>UTC +02:00</td>\n      <td>2.0</td>\n      <td>0.0</td>\n      <td>10.0</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>AE</td>\n      <td>United Arab Emirates</td>\n      <td>Asia/Dubai</td>\n      <td>UTC +04:00</td>\n      <td>4.0</td>\n      <td>2.0</td>\n      <td>12.0</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>AF</td>\n      <td>Afghanistan</td>\n      <td>Asia/Kabul</td>\n      <td>UTC +04:30</td>\n      <td>4.5</td>\n      <td>2.5</td>\n      <td>12.5</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>AG</td>\n      <td>Antigua and Barbuda</td>\n      <td>America/Antigua</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n      <td>-6.0</td>\n      <td>4.0</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>AI</td>\n      <td>Anguilla</td>\n      <td>America/Anguilla</td>\n      <td>UTC -04:00</td>\n      <td>-4.0</td>\n      <td>-6.0</td>\n      <td>4.0</td>\n      <td>-1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "source": [
    "# \n",
    "tz['ReportOffset'] = tz.CETOffset + 10\n",
    "tz['ReportOffsetDays'] = tz.ReportOffset.apply(lambda x: (x - 20) // 24).astype(int)\n",
    "tz.head()"
   ]
  }
 ]
}